//Purpose: This particular .dofile performs corrections for the appended_full clean_updated, a full dataset for the endline data collection and tracking surveys
//Date created: 13/03/2019
//Created by: Jean Aime Nsabimana jansabimana@poverty-action.org
// Coding
	*m_var : missing cases
	*i_var : inconsistencies (ranges,internal logic, etc.)
	*s_var : skiping patterns 
	*o_var : outliers -1.5*IQR<var<1.5*IQR
	*u_var : un-categorized discrepancies
	*-99: missing values
	*-55: No answer/Don't remember
//Stata version: 15


*Section 1. SCHOOL IDENTIFICATION 
*===============================================================================


set more off
clear all

cd "[directory]/Raw_data"

use "appended_full clean_updated.dta"

drop if instancename == "" // dropped this row as it contained no data

*100 province
********************************************************************************
replace province_201 = 3 if studentid_202 == 3051436
replace province_201 = 3 if studentid_202 == 3501017


replace province_201 = 1 if studentid_202 == 50405418

*101 district
********************************************************************************

replace district_201 = 3 if studentid_202 == 50405418
	
*102 sector 
********************************************************************************

replace sector_201 = 29 if studentid_202 == 50405418


*103 cell
********************************************************************************

replace cell_201 = 50 if studentid_202 == 50405418

	
*104 village 
********************************************************************************

replace village_201 = 51 if studentid_202 == 50405418
replace village_201 =  9347 if studentid_202 == 50305316 
replace village_201 = 12246 if studentid_202 == 5031054 
replace village_201 = 12246 if studentid_202 == 5031056 
replace village_201 = 9557 if studentid_202 == 4031157 
replace village_201 = 8637 if studentid_202 == 50303216 
replace village_201 = 5135 if studentid_202 == 3011031
replace village_201 = 9358 if studentid_202 == 3041098




*group_107 //treatement vs. control
********************************************************************************
//drop _merge

*merge 1:1 studentid_202 using "C:\Users\POkull\Box Sync\IPA_RWA_Projects\IPA_RWA_Project_Educate\07_Questionnaires&Data\06 Endline_Quantitative\Data\Raw_data\Resultant_data sets\groups_corr.dta" // merging some of the group data to data in memory from the baseline sheet
merge 1:1 studentid_202 using "C:\Users\ANasabimana\Box Sync\IPA_RWA_Project_Educate\07_Questionnaires&Data\06 Endline_Quantitative\Data\Raw_data\Resultant_data sets\groups_corr.dta" // merging some of the group data to data in memory from the baseline sheet

order group_107_add, after(group_107)
replace group_107 = group_107_add if group_107 == .
replace group_107 = 1 if studentid_202 == 40111210
drop if _merge == 2 
drop group_107_add

*Section 2. STUDENT IDENTIFICATION  
*===============================================================================
 
	*202 Part of boarding student 
*rename board_day_203 board_day_202
//The above question was not asked to dropout students (dropout=1). From which surveyCTO recorded them as missing values

	*203How far is home 
//The above question was not asked to dropout students (dropout=1). From which surveyCTO recorded them as missing values

	*204means of transport : trans_204

//The above question was not asked to dropout students (dropout=1)--thus, the origin of missing values. And some of the students answered with: -55 No answer, the origin of inconcistencies


* SECTION 3. SOCIO ECONOMIC BACKGROUND CHARACTERISTICS


	*age of the respondent : age_300
replace age_300 = 18 if studentid_202 == 3011031
replace age_300 = 19 if studentid_202 == 3021171
replace age_300 = 19 if studentid_202 == 30211710
replace age_300 = 17 if studentid_202 == 30211711
replace age_300 = 20 if studentid_202 == 30211712
replace age_300 = 20 if studentid_202 == 30211714
replace age_300 = 19 if studentid_202 == 30211715
replace age_300 = 22 if studentid_202 == 3021172
replace age_300 = 20 if studentid_202 == 3021173
replace age_300 = 19 if studentid_202 == 3021174
replace age_300 = 18 if studentid_202 == 3021175
replace age_300 = 18 if studentid_202 == 3021176
replace age_300 = 17 if studentid_202 == 3021177
replace age_300 = 21 if studentid_202 == 3021178
replace age_300 = 19 if studentid_202 == 3021179
//Note: The present question was not asked to baseline students. Thus, their (mergeded in the baseline dataset) was calculated in 2016


	*301 gender : gender_301

replace gender_301 = 2 if studentid_202 == 3021171
replace gender_301 = 2 if studentid_202 == 30211710
replace gender_301 = 2 if studentid_202 == 30211711
replace gender_301 =1 if studentid_202 == 30211712
replace gender_301 =2 if studentid_202 == 30211714
replace gender_301 =2 if studentid_202 == 30211715
replace gender_301 =2 if studentid_202 == 3021172
replace gender_301 =2 if studentid_202 == 3021173
replace gender_301 =2 if studentid_202 == 3021174
replace gender_301 =2 if studentid_202 == 3021175
replace gender_301 =2 if studentid_202 == 3021176
replace gender_301 =2 if studentid_202 == 3021177
replace gender_301 =1 if studentid_202 == 3021178
replace gender_301 =2 if studentid_202 == 3021179


	*303Main mat of house

replace floor_mat_303 =3 if studentid_202 == 3021171
replace floor_mat_303 =1 if studentid_202 == 30211710
replace floor_mat_303 =1 if studentid_202 == 30211711
replace floor_mat_303 =1 if studentid_202 == 30211712
replace floor_mat_303 =1 if studentid_202 == 30211714
replace floor_mat_303 =1 if studentid_202 == 30211715
replace floor_mat_303 =1 if studentid_202 == 3021172
replace floor_mat_303 =1 if studentid_202 == 3021173
replace floor_mat_303 =1 if studentid_202 == 3021174
replace floor_mat_303 =1 if studentid_202 == 3021175
replace floor_mat_303 =1 if studentid_202 == 3021176
replace floor_mat_303 =1 if studentid_202 == 3021177
replace floor_mat_303 =1 if studentid_202 == 3021178
replace floor_mat_303 =1 if studentid_202 == 3021179
replace floor_mat_303 =4 if studentid_202 == 4031041



*311 Does any member own a business: business_311

replace business_311 =0 if studentid_202 == 3021171
replace business_311 =0 if studentid_202 == 30211710
replace business_311 =0 if studentid_202 == 30211711
replace business_311 =0 if studentid_202 == 30211712
replace business_311 =0 if studentid_202 == 30211714
replace business_311 =0 if studentid_202 == 30211715
replace business_311 =0 if studentid_202 == 3021172
replace business_311 =0 if studentid_202 == 3021173
replace business_311 =0 if studentid_202 == 3021174
replace business_311 =0 if studentid_202 == 3021175
replace business_311 =0 if studentid_202 == 3021176
replace business_311 =0 if studentid_202 == 3021177
replace business_311 =0 if studentid_202 == 3021178
replace business_311 =0 if studentid_202 == 3021179
//Answer choices: 1 "Yes" 0 "No" and 2: Don't know answer


* Section 4. Academic Background & Performance.
**==================================================

*rename option_500 opt_400e
**adding other answer choices for the question option_500 from other-recorded responses

label define option_500 1 "Physics - Chemistry - Mathematics (PCM)" 2 "Physics - Chemistry - Biology (PCB)" 3 "Mathematics - Chemistry - Biology (MCB)" 4 "Biology - Chemistry - Geography (BCG)" 5 "Mathematics - Physics - Geography (MPG)" 6 "Mathematics - Physics - Computer Science (MPC)" 7 "Mathematics – Economics – Geography (MEG)" 8 "History – Economic – Geography (HEG)" 9 "History – Geography – Literature (HGL)" 10 "Other" 11 "Maths-Computer-Economics(MCE)" 12 "English-Kinyarwanda-Kiswahili(EKK)" 13 "Litterature-Economics-Geography (LEG)" 14 "Litterature- French-Kinyarwanda(LFK)" 15 "Literature-kiswahili and kinyarwanda(LKK)" 16 "History-Economics-Literature(HEL)" 17 "Construction" 18 "Tourism and Hotel Operations" 19 "Electrical and Mechanical Engineering" 20 "Accounting" 21 "Agronomy and Veterinary" 22 "Telecomunications" 23 "None", replace

	
	

	*404 Aggregate mark in last prom exam
*rename aggr_506 aggr_404

replace aggr_404 = -99 if aggr_404 == .

* Section 5.Income, Business and employment

*rename howmuch_401a amt_503

*if earn_500e!=4 {

replace amt_503 = -99 if studentid_202 == 50705515
replace amt_503 = -99 if studentid_202 == 450129
replace amt_503 = -99 if studentid_202 == 451119
replace amt_503 = -99 if studentid_202 == 40312317
replace amt_503 = -99 if studentid_202 == 4030216
replace amt_503 = -99 if studentid_202 == 5070557
replace amt_503 = -99 if studentid_202 == 5070542
replace amt_503 = -99 if studentid_202 == 50703419
replace amt_503 = -99 if studentid_202 == 4030378
replace amt_503 = -99 if studentid_202 == 50701010
replace amt_503 = -99 if studentid_202 == 50710119

drop student_code dup _merge studentid_202
order moth_occup_309_other - bus_info_212relationship_312_oth, after (bus_info_212relationship_312)
order school - family_name_b, after(loc_201)
order uniqueid, after(schoolid)
  

save "[directory]\student_endline_clean.dta", replace


